

clear all
cap log close

log using "${logfiles}step08.log", replace


// These commented out steps are used to generate firm level characteristics from Fleed and FOLK data. We then use the data as an input for matching and some future anaylysis. The user should run this the first time, but then can leave them commented out. 

/** Fleed data 
forvalues Y=2000 (1) 2016 {
use shnro sykstun sukup ika ututku tyotu toimiala ammattikoodi using"D:\ready-made\FLEED_TOTAL\2016\fleed_kokonais_`Y'.dta", clear
gen year=`Y' 
save "${dataout}total_plant_employee_`Y'", replace
} 

* FOLK data
forvalues Y=2017 (1) 2018{
    local cond "vuosi==`Y'"
	use shnro sukup ika ututku_ala vuosi if `cond' using "D:\ready-made\FOLK_perus_11a\folk_20112020_tua_perus21tot_1.dta" , clear
	
save "${dataout}FOLK_gender_`Y'.dta", replace

	
	use shnro vuosi tyotu if `cond' using"D:\ready-made\FOLK_tulo_11a\folk_20112019_tua_tulo21tot_1.dta", clear

save "${dataout}FOLK_earnings_`Y'.dta", replace


	use shnro sykstun toimiala vuosi if `cond' using "D:\ready-made\FOLK_tkt_11a\folk_20112018_tua_tkt21tot_1.dta" , clear
	

merge 1:1 shnro using "${dataout}FOLK_gender_`Y'.dta"
keep if _merge==3 
drop _merge 


merge 1:1 shnro using "${dataout}FOLK_earnings_`Y'.dta"
keep if _merge==3 
drop _merge 
	

drop vuosi 
gen year=`Y'
save "${dataout}total_plant_employee_`Y'", replace
}


use "${dataout}total_plant_employee_2000", clear
foreach Z of numlist 2001/2018 {
append using "${dataout}total_plant_employee_`Z'"
}

save "${dataout}total_plant_employee_allyears", replace


use "${dataout}total_plant_employee_allyears", clear
**** Generate firm characteristics for all firms 
drop if sykstun=="" 
gen employee=1
gen earnings=tyotu
replace earnings=0 if earnings==.



*INDUSTRY AND OCCUPATION FOR EWCS DATA

	gen nace=real(substr(toimiala, 1,2))
	destring nace, replace 
	label var nace "Industry"
	
	
	* Demographics
	gen age=ika 
	gen female_employee=(sukup=="2")
	gen male_employee=(sukup=="1")
	
	
	gen female_earnings=earnings if female_employee==1
	gen male_earnings=earnings if male_employee==1
	

	
	*Education variables
	g educ=ututku 
	destring educ, replace
	gen level=substr(ututku, 1,1)
	destring level, replace
	* replace education missing as another level: 2
	replace level=0 if level==.
	
	*** Dummy variable for education 
	gen college=level>=6
	gen high_school=leve>=3 & level<=5
	gen ho=level<3
	
	
	*Occupation codes
	gen occ=real(substr(ammattikoodi, 1,1))
	
	gen manager=(occ==1)
gen male_manager=manager*male_employee
gen female_manager=manager*female_employee
	

sort shnro sykstun year
bysort shnro sykstun: egen tenure=total(employee)




keep employee female_employee earnings age level nace tenure sykstun year male_employee female_earnings male_earnings manager female_manager male_manager college high_school ho 



*** Create female share of top 10% earners in the industry 
bys nace year: gen total_employee = _N
g thresh = floor(0.9*total_employee)

sort year nace earnings
by year nace : gen e_rank = _n
g top_earner = (e_rank > thresh)
g female_top_earner = female_employee*top_earner
bys year nace : egen total_female_top = total(female_top_earner)
g share_ftop_ind = total_female_top/(total_employee - thresh) 


drop total_employee thresh top_earner female_top_earner total_female_top
drop e_rank 



*** Create female share of top 10% earners in the firm 
bys sykstun year: gen total_employee = _N
g thresh = floor(0.9*total_employee)

sort year sykstun earnings
by year sykstun : gen e_rank = _n
g top_earner = (e_rank > thresh)
g female_top_earner = female_employee*top_earner
bys year sykstun : egen total_female_top = total(female_top_earner)
g share_female_top = total_female_top/(total_employee - thresh) 


* Collapse the total number of employees and total number of new hires in each plant each year 
collapse (sum) employee (sum) female_employee (sum) male_employee (sum) male_manager (sum) female_manager (mean) earnings (mean) female_earnings (mean) male_earnings (mean) age  (mean) level (mean) tenure (median) nace (mean) college (mean) high_school (mean) ho (mean) share_ftop_ind  (mean) share_female_top, by(sykstun year)


* Generate turnovers, which equals to total number of employee changing between two periods minus new hires 
sort sykstun year
bysort sykstun: gen emp_change=employee-employee[_n-1]

bysort sykstun: gen emp_change_female=female_employee-female_employee[_n-1]

bysort sykstun: gen emp_change_male=male_employee-male_employee[_n-1]


* Generate share of female employees 
gen share_female=female_employee/employee

duplicates report sykstun
* Check the last year that firm shows in the data 
sort sykstun year 
bysort sykstun: egen last_year=max(year)
gen plant_closed_year=last_year+1

* Deflated prices 
fmerge m:1 year using  "${dataout}\cpi"
drop _merge==2 
drop _merge 

replace earnings=earnings/cpi
replace female_earnings=female_earnings/cpi
replace male_earnings=male_earnings/cpi

replace nace=int(nace)


*Merge with newhires calculated from a different file 
merge 1:1 sykstun year using "${dataout}newhire" 
drop _merge 

replace newhire=0 if newhire==. 
replace female_newhire=0 if female_newhire==. 
replace male_newhire=0 if male_newhire==. 

*Use total employee change and newhires to calculate turnover rate
gen switch=emp_change-newhire
gen switch_female=emp_change_female-female_newhire
gen switch_male=emp_change_male-male_newhire

replace switch=0 if switch==. 
replace  switch_female=0 if switch_female==. 
replace switch_male=0 if switch_male==. 


replace nace=0 if nace==. 
save "${dataout}plant_char_allyears", replace 


forvalues Y=2000 (1) 2018{
preserve 
keep if year==`Y'
save "${dataout}plant_char_`Y'", replace 
restore 

}



// generate industry level % women in top jobs 

use "${dataout}total_plant_employee_allyears", clear 
drop if toimiala==""

gen nace=real(substr(toimiala, 1,2))
	destring nace, replace 
	label var nace "Industry"
	
	 gen earnings=tyotu
	
	gen age=ika 
	gen female_employee=(sukup=="2")
	gen male_employee=(sukup=="1")
	
keep female_employee earnings sykstun nace year

*** Create female share of top 20% earners in the firm 
bys nace year: gen total_employee = _N
g thresh = floor(0.9*total_employee)

sort year nace earnings
by year nace : gen e_rank = _n
g top_earner = (e_rank > thresh)
g female_top_earner = female_employee*top_earner
bys year nace : egen total_female_top = total(female_top_earner)
g share_ftop_ind = total_female_top/(total_employee - thresh) 

collapse (mean) share_ftop_ind, by(nace year)

ren year year_event
save "${dataout}total_industry_topfemale", replace 



	// generate firm level % women in top jobs 

use "${dataout}total_plant_employee_allyears", clear 
drop if toimiala==""

gen nace=real(substr(toimiala, 1,2))
	destring nace, replace 
	label var nace "Industry"
	
	 gen earnings=tyotu
	
	gen age=ika 
	gen female_employee=(sukup=="2")
	gen male_employee=(sukup=="1")
	
keep female_employee earnings sykstun nace year

*** Create female share of top 20% earners in the firm

sort year sykstun earnings
by year sykstun : gen e_rank = _n
bys sykstun year: gen total_employee = _N

forvalues i=5(5)20 {

g thresh`i' = floor((100-`i')/100*total_employee)


g top_earner`i' = (e_rank > thresh`i')
g female_top_earner`i' = female_employee*top_earner`i'
bys year sykstun : egen total_female_top`i' = total(female_top_earner`i')
g share_female_top`i' = total_female_top`i'/(total_employee - thresh`i') 
} 


collapse (mean) share_female_top5 share_female_top10 share_female_top15 share_female_top20, by(sykstun year)

rename year year_event
save "${dataout}total_firm_topfemale", replace 
*/
	
set seed 123456 

*** Pin down treatment firms: the firms where violence happened
use "$dataout\allmatches_allyears_matchpast5", clear

* Firms that are by definition with workplace violence 
gen new_wp_crime=victim_sykstun_lag==defendant_sykstun_lag & defendant_sykstun_lag!=""
bys match_id1 year_event: ereplace new_wp_crime=max(new_wp_crime)

* Firms that are by definition including DV couples for robustness checks 
ren shnro defendant_shnro
ren victim_shnro shnro
merge m:1 shnro defendant_shnro year_event using "${dataout}victim_couple_status", keepusing(couple)
drop if _merge ==2
drop _merge


*Keep only 1 match id 
rename match_id1 matchid
drop match_id*
rename matchid match_id1


sort shnro year_event time 
bysort shnro year_event: replace sykstun=sykstun[_n-1] if time==0 

*Keep only violent firms
keep if wp_crime==1

keep if time ==0
drop if sykstun==""



gen mm=(sukup=="1" & plaintiff_sex=="1")
gen mf=(sukup=="1" & plaintiff_sex=="2")

collapse (mean) wp_crime (max) manager_sub (max) manager_sub2 (max) mm (max) mf (max) new_wp_crime (min) couple, by(sykstun year)

gen year_event =year 

save "$dataout\treatment_firms.dta", replace 

merge m:1 sykstun year_event using "$dataout\treatment_violent_firms_income_pct_gap.dta"

sum income_gap_pct if mf==1,d 
local med=r(p50)
gen incgap=(income_gap_pct>`med')

save "$dataout\treatment_firms.dta", replace 






*Find firm matches
forvalues Y=2006 (1) 2013 {

use "$dataout\treatment_firms.dta", clear
keep sykstun year_event year wp_crime  manager_sub  manager_sub2 mm mf incgap new_wp_crime couple

keep if year_event==`Y'


local minus1=`Y'-1 
local minus2=`Y'-2 
local minus3=`Y'-3 
local minus4=`Y'-4 
local minus5=`Y'-5 

        *Merge with past 5 years firm level characteristics to do nn matching 
		merge m:1 sykstun using "${dataout}plant_char_`Y'", keepusing (employee female_employee switch newhire earnings nace switch_female female_newhire)
		drop _merge
		
		
		rename earnings earnings_0
		rename employee employee_0
		rename female_employee female_employee_0
		rename switch switch_0
		rename newhire newhire_0
		rename nace nace_0 
	
		gen share_female_0=female_employee_0/employee_0
		
		
		merge m:1 sykstun using "${dataout}plant_char_`minus1'" , keepusing (employee female_employee switch newhire earnings nace switch_female female_newhire)
		keep if _merge==3
		drop _merge
		
		rename earnings earnings_1
		rename employee employee_1
		rename female_employee female_employee_1
		rename switch switch_1
		rename newhire newhire_1
		rename female_newhire female_newhire_1
		rename nace nace_1
		rename switch_female switch_female_1
		
		gen share_female_1=female_employee_1/employee_1
		
		merge m:1 sykstun using "${dataout}plant_char_`minus2'" , keepusing (employee female_employee switch newhire earnings nace switch_female female_newhire)
		keep if _merge==3
		drop _merge
		
		rename earnings earnings_2
		rename employee employee_2
		rename female_employee female_employee_2
		rename switch switch_2
		rename newhire newhire_2
		rename female_newhire female_newhire_2
		rename nace nace_2
		rename switch_female switch_female_2

		gen share_female_2=female_employee_2/employee_2
		
		merge m:1 sykstun using "${dataout}plant_char_`minus3'" , keepusing (employee female_employee switch newhire earnings nace switch_female female_newhire)
		keep if _merge==3
		drop _merge
		
		rename earnings earnings_3
		rename employee employee_3
		rename female_employee female_employee_3
		rename female_newhire female_newhire_3
		rename switch switch_3
		rename newhire newhire_3
		rename nace nace_3
		rename switch_female switch_female_3
		gen share_female_3=female_employee_3/employee_3
		
		merge m:1 sykstun using "${dataout}plant_char_`minus4'" , keepusing (employee female_employee switch newhire earnings nace switch_female female_newhire)
		keep if _merge==3
		drop _merge
		
		rename earnings earnings_4
		rename employee employee_4
		rename female_employee female_employee_4
		rename female_newhire female_newhire_4
		rename switch switch_4
		rename newhire newhire_4
		rename nace nace_4
		rename switch_female switch_female_4
		gen share_female_4=female_employee_4/employee_4
		
		merge m:1 sykstun using "${dataout}plant_char_`minus5'" , keepusing (employee female_employee switch newhire earnings nace switch_female female_newhire)
		keep if _merge==3
		drop _merge
		
		rename earnings earnings_5
		rename employee employee_5
		rename female_employee female_employee_5
		rename female_newhire female_newhire_5
		rename switch switch_5
		rename newhire newhire_5
		rename nace nace_5
		rename switch_female switch_female_5
		gen share_female_5=female_employee_5/employee_5
		
		
		gen share_newhire_1=newhire_1/employee_0
		gen share_newhire_2=newhire_2/employee_0
		gen share_newhire_3=newhire_3/employee_0
		gen share_newhire_4=newhire_4/employee_0
		gen share_newhire_5=newhire_5/employee_0
		
		
		gen share_fnewhire_1=female_newhire_1/employee_0
		gen share_fnewhire_2=female_newhire_2/employee_0
		gen share_fnewhire_3=female_newhire_3/employee_0
		gen share_fnewhire_4=female_newhire_4/employee_0
		gen share_fnewhire_5=female_newhire_5/employee_0
		
	
		gen share_switch_1=switch_1/employee_0
		gen share_switch_2=switch_2/employee_0
		gen share_switch_3=switch_3/employee_0
		gen share_switch_4=switch_4/employee_0
		gen share_switch_5=switch_5/employee_0
		
		gen share_switch_female_1=switch_female_1/employee_0
		gen share_switch_female_2=switch_female_2/employee_0
		gen share_switch_female_3=switch_female_3/employee_0
		gen share_switch_female_4=switch_female_4/employee_0
		gen share_switch_female_5=switch_female_5/employee_0
		

		
		ereplace year=max(year)
		

	replace wp_crime=0 if wp_crime==.
	drop if _n>100000 & wp_crime==0

	

	gen log_ave_earnings_1=log(earnings_1+1)
	gen log_ave_earnings_2=log(earnings_2+1)
	gen log_ave_earnings_3=log(earnings_3+1)
	gen log_ave_earnings_4=log(earnings_4+1)
	gen log_ave_earnings_5=log(earnings_5+1)
	
	
	
	gen log_employee_1=log(employee_1)
	gen log_employee_2=log(employee_2)
	gen log_employee_3=log(employee_3)
	gen log_employee_4=log(employee_4)
	gen log_employee_5=log(employee_5)
	
	
	drop if sykstun==""
	gen outcomes=1
	
    sort sykstun wp_crime 
	
	capture noisily teffects nnmatch (outcomes log_employee_* log_ave_earnings* share_female_* share_newhire_* nace_* share_switch_*) (wp_crime),  osample (new)
	
	* Drop obs that violate overlap assumptions
	drop if new==1

	teffects nnmatch (outcomes log_employee_* log_ave_earnings* share_female_* share_newhire_* nace_* share_switch_*) (wp_crime),  gen (match_id)
   
	* Find their matches and matched ids
	save "$dataout\complete", replace 
	gen rownum=_n
	
	preserve 
	keep if wp_crime==1 & !missing(match_id1)
	save "$dataout\treatment", replace
	restore 
	
	keep if wp_crime==0 & !missing(match_id1)
	drop match_id1
	rename rownum match_id1
	
	save "$dataout\control", replace
	
	merge 1:m match_id1 using "$dataout\treatment", keepusing (match_id1)
	keep if _merge==3
	drop _merge
	tab match_id1
	
	append using "$dataout\treatment"
	
	save "$dataout\firm_allmatches_long_`Y'_control.dta", replace
  
  
  
}


* Create unique match_ids
forvalues Y=2006 (1) 2013 {
    use "$dataout\firm_allmatches_long_`Y'_control.dta", clear
		*** Make sure each match has a unique id 
	sort match_id1 wp_crime 
	bysort match_id1: gen temp=_N
    sum match_id1, d
	gen max_match_id1=r(max)
	bysort match_id1: gen match_id1_temp=match_id1+max_match_id1 if temp==4 & (_n==2 | _n==4)
	bysort match_id1: replace match_id1_temp=match_id1+max_match_id1+1 if temp==6 & (_n==2 | _n==5)
	bysort match_id1: replace match_id1_temp=match_id1+max_match_id1+2 if temp==6 & (_n==3 | _n==6)
	
	bysort match_id1: replace match_id1_temp=match_id1+max_match_id1+3 if temp==8 & (_n==2 | _n==6)
	bysort match_id1: replace match_id1_temp=match_id1+max_match_id1+4 if temp==8 & (_n==3 | _n==7)
	bysort match_id1: replace match_id1_temp=match_id1+max_match_id1+5 if temp==8 & (_n==4 | _n==8)
	
	
	bysort match_id1: replace match_id1_temp=match_id1+max_match_id1+6 if temp==10 & (_n==2 | _n==7)
	bysort match_id1: replace match_id1_temp=match_id1+max_match_id1+7 if temp==10 & (_n==3 | _n==8)
	bysort match_id1: replace match_id1_temp=match_id1+max_match_id1+8 if temp==10 & (_n==4 | _n==9)
	bysort match_id1: replace match_id1_temp=match_id1+max_match_id1+9 if temp==10 & (_n==5 | _n==10)
	
	replace  match_id1= match_id1_temp if  match_id1_temp!=. 
	
	drop temp match_id1_temp
	bysort match_id1: gen temp=_N
	tab temp 
	drop temp max_match_id1
	save "$dataout\firm_allmatches_`Y'_control.dta",replace 

} 



// After finding the matches, link to the firm's data 5 years before and 5 years after the event
forvalues Y=2006 (1) 2013 {

	forvalues a= -5(1)5{
 
	use "$dataout\firm_allmatches_`Y'_control.dta", clear
	drop employee* earnings* share_female*
	
	
	gen time=`a'
	ereplace year_event=max(year_event)
	
	local k=`Y'+`a'

   
	merge m:1 sykstun using "${dataout}plant_char_`k'"
	drop if _merge==2
	

	save "$dataout\firm_allmatches_long_`Y'_`k'_control.dta", replace
    
	
	}

}



// Combine all years to form a firm panel data 
use "$dataout\firm_allmatches_long_2013_2018_control.dta", clear

	forvalues i=-5 (1)4 {
	local a=2013+`i'
	append using "$dataout\firm_allmatches_long_2013_`a'_control.dta"
}



forvalues Y=2006 (1) 2012 {
	forvalues i=-5(1) 5{
		local a=`Y'+`i'
	append using "$dataout\firm_allmatches_long_`Y'_`a'_control.dta"
}
}


replace year=year_event+time




gen baseline_employee=employee if time==0 
sort sykstun year
bysort sykstun: replace baseline_employee=employee[_n-1] if employee==. & time==0
bysort sykstun year_event: ereplace baseline_employee=max(baseline_employee)



* Generate base year total employees
gen baseline_fe=female_employee if time==0 
sort sykstun year
bysort sykstun: replace baseline_fe=female_employee[_n-1] if female_employee==. & time==0
bysort sykstun year_event: ereplace baseline_fe=max(baseline_fe)



* Generate base year total employees
gen baseline_me=male_employee if time==0 
sort sykstun year
bysort sykstun: replace baseline_me=male_employee[_n-1] if male_employee==. & time==0
bysort sykstun year_event: ereplace baseline_me=max(baseline_me)

gegen match_id1=group(match_id1 year_event), replace 


save "${dataout}turnover_allyears_violentfirms_old", replace 
